home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Games of Daze
/
Infomagic - Games of Daze (Summer 1995) (Disc 1 of 2).iso
/
x2ftp
/
msdos
/
source
/
vr386
/
sqrti.asm
< prev
next >
Wrap
Assembly Source File
|
1994-01-10
|
7KB
|
441 lines
TITLE SQRTI - INTEGER SQRT AND MAGNITUDE
COMMENT $
/* Routines for integer sqrt and magnitude for REND386 */
// All code by Dave Stampe, last updated 23/12/93
// These routines are (c) 1993 by Dave Stampe
/*
This code is part of the VR-386 project, created by Dave Stampe.
VR-386 is a desendent of REND386, created by Dave Stampe and
Bernie Roehl. Almost all the code has been rewritten by Dave
Stampre for VR-386.
Copyright (c) 1994 by Dave Stampe:
May be freely used to write software for release into the public domain
or for educational use; all commercial endeavours MUST contact Dave Stampe
(dstampe@psych.toronto.edu) for permission to incorporate any part of
this software or source code into their products! Usually there is no
charge for under 50-100 items for low-cost or shareware products, and terms
are reasonable. Any royalties are used for development, so equipment is
often acceptable payment.
ATTRIBUTION: If you use any part of this source code or the libraries
in your projects, you must give attribution to VR-386 and Dave Stampe,
and any other authors in your documentation, source code, and at startup
of your program. Let's keep the freeware ball rolling!
DEVELOPMENT: VR-386 is a effort to develop the process started by
REND386, improving programmer access by rewriting the code and supplying
a standard API. If you write improvements, add new functions rather
than rewriting current functions. This will make it possible to
include you improved code in the next API release. YOU can help advance
VR-386. Comments on the API are welcome.
CONTACT: dstampe@psych.toronto.edu
*/
$
.MODEL large
.CODE INTMATH
; one iteration of 32->16 sqrt
SQRT32 MACRO
LOCAL skip
shld edx,eax,2 ;; get 2 bits of input to error
shl eax,2
add ebx,ebx ;; estimate*2
mov ecx,ebx ;; temp = est*2
add ecx,ecx
cmp edx,ecx ;; error>2*est?
jle skip
inc ebx ;; yes, update for new bit added
inc ecx
sub edx,ecx
skip:
ENDM
;long squareroot16(long arg)
; takes root of 32 bit number to 16 bit result
; about 220 clocks worst case:
; 3 us on 486/66 and 10 us on 386/25
larg equ DWORD PTR [bp+8]
PUBLIC _squareroot32
_squareroot32 proc far
.386
push ebp
mov ebp,esp
push esi
push ecx
xor edx,edx
xor ebx,ebx
mov eax,DWORD PTR larg
test eax,0FFFF0000h ; can we cut it in half?
jne hasupper
shl eax,16 ; yes, so prescale
test eax,0FF000000h ; half again?
jne do16
shl eax,8 ; yes, prescale
jmp do8 ; do 8 loops
hasupper:
test eax,0FF000000h ; half again?
jne do32
shl eax,8
jmp do24
do32:
SQRT32
SQRT32
SQRT32
SQRT32
do24:
SQRT32
SQRT32
SQRT32
SQRT32
do16:
SQRT32
SQRT32
SQRT32
SQRT32
do8:
SQRT32
SQRT32
SQRT32
SQRT32
mov eax,ebx
shld edx,eax,16 ; returns in both eax and dx:ax
pop ecx
pop esi
mov esp,ebp
pop ebp
ret
_squareroot32 endp
; one iteration of 62->31 sqrt
SQRT64 MACRO
LOCAL skip
shld edi,edx,2
shld edx,eax,2 ; get 2 bits of input to error
shl eax,2
add ebx,ebx ; estimate*2
mov ecx,ebx ; temp = est*2
add ecx,ecx
cmp edi,ecx ; error>2*est?
jle skip
inc ebx ; yes, update for new bit added
inc ecx
sub edi,ecx
skip:
ENDM
;long squareroot62(long hiarg, long loarg)
; takes root of 62 bit number to 31 bit result
; about 500 clocks worst case:
; 8 us on 486/66 and 20 us on 386/25
hiarg equ DWORD PTR [bp+8]
loarg equ DWORD PTR [bp+12]
PUBLIC _squareroot62
_squareroot62 proc far
.386
push ebp
mov ebp,esp
mov edx,DWORD PTR hiarg
mov eax,DWORD PTR loarg
or edx,edx
jne dohigh
push eax ; can use short root!
call _squareroot32
sub esp,4
mov esp,ebp
pop ebp
ret
dohigh:
push ecx ; have to do 2 dwords
push esi
push edi
xor edi,edi
xor ebx,ebx
shld edx,eax,2 ; prescale for 62 bits in 64 bit word
shl eax,2
test edx,0FFFF0000h ; can we cut it in half?
jne hashigh
shld edx,eax,16 ; yes, so prescale
shl eax,16
test edx,0FF000000h ; half again?
jne do48 ; no, do 48 loops
shld edx,eax,8
shl eax,8 ; yes, prescale
jmp do40 ; do 40 loops
hashigh:
test edx,0FF000000h ; half again?
jne do64
shld edx,eax,8
shl eax,8
jmp do56
do64:
SQRT64
SQRT64
SQRT64
SQRT64
do56:
SQRT64
SQRT64
SQRT64
SQRT64
do48:
SQRT64
SQRT64
SQRT64
SQRT64
do40:
SQRT64
SQRT64
SQRT64
SQRT64
SQRT64
SQRT64
SQRT64
SQRT64
SQRT64
SQRT64
SQRT64
SQRT64
SQRT64
SQRT64
SQRT64
SQRT64
SQRT64
SQRT64
SQRT64
; one missing because of prescale
mov eax,ebx
pop edi
pop esi
pop ecx
shld edx,eax,16 ; returns in both eax and dx:ax
mov esp,ebp
pop ebp
ret
_squareroot62 endp
;long magnitude32(long x, long y, long z)
; computes overall magnitude of vector
; no scaling or shortcuts: does 3x32-bit multiplies
; time: worst case of 650 clocks, best case of 200
; 3 to 10 us on 486/66, 8 to 25 us on 386/25
x equ DWORD PTR [bp+8]
y equ DWORD PTR [bp+12]
z equ DWORD PTR [bp+16]
PUBLIC _magnitude32
_magnitude32 proc far
.386
push ebp
mov ebp,esp
push ecx
mov eax,x ; sum of squares
imul x
mov ebx,eax
mov ecx,edx
mov eax,y
imul y
add ebx,eax
adc ecx,edx
mov eax,z
imul z
add ebx,eax
adc ecx,edx
push ebx ; square root
push ecx
call _squareroot62
add esp,8
pop ecx
mov esp,ebp
pop ebp
ret
_magnitude32 endp
;long magnitude16(int x, int y, int z)
; computes overall magnitude of vector
; no scaling or shortcuts: does 3x16-bit multiplies
; time: worst case of 300 clocks, best case of 150
; 2 to 5 us on 486/66, 6 to 12 us on 386/25
x equ WORD PTR [bp+8]
y equ WORD PTR [bp+10]
z equ WORD PTR [bp+12]
PUBLIC _magnitude16
_magnitude16 proc far
.386
push ebp
mov ebp,esp
push ecx
mov ax,x ; sum of squares
imul x
mov bx,ax
mov cx,dx
mov ax,y
imul y
add bx,ax
adc cx,dx
mov ax,z
imul z
add bx,ax
adc cx,dx
push cx ; square root
push bx
call _squareroot32
add esp,4
pop ecx
mov esp,ebp
pop ebp
ret
_magnitude16 endp
;void set_vector_length32(long length, long *xp, long *yp, long *zp)
; sets overall magnitude of vector
length equ DWORD PTR [bp+8]
xp equ DWORD PTR [bp+12]
yp equ DWORD PTR [bp+16]
zp equ DWORD PTR [bp+20]
PUBLIC _set_vector_magnitude32
_set_vector_magnitude32 proc far
.386
push ebp
mov ebp,esp
sub esp,20
push ecx
push edi
push esi
les bx,xp ; compute magnitude
push DWORD PTR es:[bx]
les bx,yp
push DWORD PTR es:[bx]
les bx,zp
push DWORD PTR es:[bx]
call _magnitude32
add esp,12
mov esi,eax
or eax,eax
je zero_magnitude
les bx,xp ; scale each part
mov eax, es:[bx]
imul length
idiv esi
les bx,xp
mov es:[bx],eax
les bx,yp
mov eax, es:[bx]
imul length
idiv esi
les bx,yp
mov es:[bx],eax
les bx,zp
mov eax, es:[bx]
imul length
idiv esi
les bx,zp
mov es:[bx],eax
zero_magnitude:
pop esi
pop edi
pop ecx
mov esp,ebp
pop ebp
ret
_set_vector_magnitude32 endp
end